The asm here results in incorrect code generation when passing
single precision floats with Clang.

Therefore, implement the storing without assembly. The D extension
specification states:

When multiple floating-point precisions are supported, then valid values of narrower n-bit types,
n < FLEN, are represented in the lower n bits of an FLEN-bit NaN value, in a process termed
NaN-boxing. The upper bits of a valid NaN-boxed value must be all 1s.

That means we can just set the value to all ones, copy over the float
and finally store it in an actual float type (via memcpy to avoid
violating aliasing rules).

In case of return values, we have the data loaded from the register
into memory and we know we need just the 32 bits so simply take them.

--- a/src/riscv/ffi.c
+++ b/src/riscv/ffi.c
@@ -150,12 +150,14 @@ static void marshal_atom(call_builder *cb, int type, void *data) {
            reinterpret floats as doubles */
 #if ABI_FLEN >= 32
         case FFI_TYPE_FLOAT:
-            asm("" : "=f"(cb->aregs->fa[cb->used_float++]) : "0"(*(float *)data));
+            value = ~(size_t)0;
+            memcpy(&value, data, sizeof(float));
+            memcpy(&cb->aregs->fa[cb->used_float++], &value, sizeof(ABI_FLOAT));
             return;
 #endif
 #if ABI_FLEN >= 64
         case FFI_TYPE_DOUBLE:
-            asm("" : "=f"(cb->aregs->fa[cb->used_float++]) : "0"(*(double *)data));
+            memcpy(&cb->aregs->fa[cb->used_float++], data, sizeof(double));
             return;
 #endif
         default: FFI_ASSERT(0); break;
@@ -173,12 +175,12 @@ static void unmarshal_atom(call_builder *cb, int type, void *data) {
     switch (type) {
 #if ABI_FLEN >= 32
         case FFI_TYPE_FLOAT:
-            asm("" : "=f"(*(float *)data) : "0"(cb->aregs->fa[cb->used_float++]));
+            memcpy(data, &cb->aregs->fa[cb->used_float++], sizeof(float));
             return;
 #endif
 #if ABI_FLEN >= 64
         case FFI_TYPE_DOUBLE:
-            asm("" : "=f"(*(double *)data) : "0"(cb->aregs->fa[cb->used_float++]));
+            memcpy(data, &cb->aregs->fa[cb->used_float++], sizeof(double));
             return;
 #endif
     }
